// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 2x12 SOFT
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_2x12_SOFT] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(1.399e-02, -8.613e-01, 4.510e-02, -6.198e-02) * s0_0_0;
	r1 += V4(3.144e-03, 3.465e-03, -1.064e-02, -3.830e-01) * s0_0_0;
	r2 += V4(1.107e-01, -1.557e-02, 5.595e-02, -4.578e-01) * s0_0_0;
	r0 += V4(6.443e-01, 8.875e-01, -8.281e-02, -2.051e-01) * s0_0_1;
	r1 += V4(-1.004e+00, 1.209e-02, 3.660e-02, 1.245e-01) * s0_0_1;
	r2 += V4(3.588e-01, 5.996e-01, -1.519e-01, 6.267e-01) * s0_0_1;
	r0 += V4(-6.438e-01, -9.581e-03, -3.701e-01, -2.779e-03) * s0_0_2;
	r1 += V4(-1.427e-02, -9.016e-03, -3.210e-02, 1.661e-02) * s0_0_2;
	r2 += V4(2.842e-02, 4.016e-01, 1.525e-01, -7.983e-02) * s0_0_2;
	r0 += V4(-3.874e-01, -9.402e-02, -4.577e-02, -1.556e-01) * s0_1_0;
	r1 += V4(-3.956e-03, 4.769e-02, 1.407e-02, -2.487e-01) * s0_1_0;
	r2 += V4(2.265e-01, -1.547e-02, -3.945e-04, 6.262e-01) * s0_1_0;
	r0 += V4(-3.448e-01, 7.295e-02, 8.072e-01, 8.032e-01) * s0_1_1;
	r1 += V4(1.008e+00, 1.200e-01, 8.973e-01, 6.333e-01) * s0_1_1;
	r2 += V4(-1.332e+00, -5.254e-01, -7.528e-01, -2.037e-01) * s0_1_1;
	r0 += V4(7.315e-01, 5.366e-03, -2.933e-01, -1.430e-01) * s0_1_2;
	r1 += V4(8.885e-03, 9.111e-02, -9.074e-01, -1.773e-02) * s0_1_2;
	r2 += V4(2.101e-01, -4.435e-01, 9.942e-02, -5.028e-01) * s0_1_2;
	r0 += V4(4.129e-01, -1.551e-02, -2.574e-01, 5.755e-03) * s0_2_0;
	r1 += V4(1.012e-03, -1.300e-02, -4.951e-03, -6.018e-02) * s0_2_0;
	r2 += V4(1.625e-02, 2.482e-02, -5.238e-02, -1.716e-01) * s0_2_0;
	r0 += V4(-3.211e-01, 1.562e-02, 1.657e-01, -9.189e-02) * s0_2_1;
	r1 += V4(-5.826e-04, -6.887e-01, 2.155e-02, 3.070e-01) * s0_2_1;
	r2 += V4(1.294e-01, -2.412e-02, 4.100e-01, -4.229e-01) * s0_2_1;
	r0 += V4(-9.839e-02, 2.218e-03, 3.019e-02, 1.475e-02) * s0_2_2;
	r1 += V4(-9.086e-04, 2.603e-01, -1.723e-02, -3.740e-01) * s0_2_2;
	r2 += V4(-2.375e-02, -3.243e-03, 2.389e-01, 5.905e-01) * s0_2_2;
	r0 += V4(-4.804e-03, -1.492e-04, -1.916e-03, 1.624e-03);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-8.291e-05, -5.733e-04, 2.415e-06, 1.064e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-2.745e-03, -1.936e-04, -3.452e-03, -4.412e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_2x12_SOFT] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(7.525e-02, 8.272e-03, -2.692e-01, -1.001e-01, 6.181e-03, -1.355e-02, 2.170e-01, -1.117e-01, 2.588e-02, 1.514e-02, 1.091e-01, -1.172e-01, 8.581e-02, 1.130e-01, 7.322e-02, -1.881e-01) * s0_0_0;
	r1 += M4(-9.083e-03, 3.732e-02, 2.298e-01, 1.972e-02, -5.024e-03, 3.183e-02, -3.841e-02, -1.127e-01, -9.484e-03, -9.055e-02, -1.385e-01, -9.703e-02, 8.347e-02, -2.056e-01, -1.566e-02, 1.431e-01) * s0_0_0;
	r2 += M4(1.999e-02, -6.034e-03, -2.355e-04, 1.447e-01, -1.191e-02, 4.986e-02, -1.852e-02, 5.255e-02, 5.008e-02, -1.772e-02, -1.282e-02, 1.068e-01, -8.387e-02, 8.494e-03, -1.193e-01, -3.242e-01) * s0_0_0;
	r0 += M4(-1.876e-03, -6.489e-02, -1.694e-01, -1.441e-01, -1.585e-02, 2.953e-03, 1.749e-01, -5.515e-02, 2.684e-02, 1.715e-02, 4.613e-01, -2.354e-01, -3.176e-01, -5.840e-01, 1.194e-01, -3.311e-01) * s0_0_1;
	r1 += M4(-1.026e-01, 1.527e-03, 2.407e-01, 3.677e-02, 3.253e-02, -1.157e-01, -2.226e-02, -1.510e-03, 1.638e-01, -5.659e-02, 8.244e-02, -1.594e-01, -2.568e-01, -5.674e-01, -8.797e-01, 1.018e-01) * s0_0_1;
	r2 += M4(1.841e-02, -1.114e-01, -5.513e-02, 2.514e-01, 6.114e-02, 7.594e-02, 5.909e-02, -2.517e-02, 8.192e-02, -1.878e-03, 2.105e-02, 7.011e-01, 1.750e-01, 2.311e-01, -8.247e-01, 1.078e-01) * s0_0_1;
	r0 += M4(-4.142e-02, 9.331e-02, -1.926e-01, -4.285e-02, -8.313e-03, 4.041e-04, -9.432e-02, -3.161e-02, -2.227e-03, -6.469e-02, 3.684e-01, -2.171e-01, -2.236e-02, 1.161e-02, -6.895e-02, 1.879e-01) * s0_0_2;
	r1 += M4(-6.308e-02, 5.317e-02, 1.790e-01, -1.563e-02, 4.152e-02, 4.490e-02, -1.345e-01, -4.802e-02, 4.595e-02, 3.584e-02, -7.215e-03, -2.372e-02, 4.126e-03, -1.197e-01, -1.448e-01, 1.152e-01) * s0_0_2;
	r2 += M4(6.928e-02, -8.984e-02, -1.017e-02, 1.346e-01, 8.922e-02, 1.824e-01, 8.097e-02, -1.141e-02, -2.819e-02, -4.639e-02, 8.044e-02, -1.233e-01, 1.918e-01, -4.718e-01, 2.280e-01, -4.669e-01) * s0_0_2;
	r0 += M4(-1.290e-01, -1.392e-01, -5.530e-03, -2.440e-01, -1.466e-01, -4.135e-02, -1.484e-01, -1.194e-01, -6.228e-04, -3.257e-02, -4.468e-01, -1.431e-01, 1.000e+00, 4.095e-01, -4.307e-01, 2.583e-01) * s0_1_0;
	r1 += M4(-6.872e-02, 1.287e-03, 4.879e-02, -4.114e-02, 6.096e-03, -2.653e-02, -1.036e-01, -1.249e-01, 7.596e-02, 6.019e-02, 2.572e-01, -8.310e-03, -1.144e-02, 7.421e-02, 5.263e-01, 1.512e-01) * s0_1_0;
	r2 += M4(-3.472e-02, -8.319e-02, 1.216e-01, -3.765e-02, 7.592e-02, 1.274e-01, -7.484e-02, 5.197e-02, 4.666e-02, -1.864e-02, 8.452e-02, 1.573e-01, -1.682e-01, -1.547e-03, 2.355e-01, -1.968e-01) * s0_1_0;
	r0 += M4(-7.343e-03, -1.423e-01, 2.168e-01, -3.101e-01, -5.915e-02, 2.119e-01, -5.086e-02, 1.744e-01, 2.035e-01, 2.960e-01, -2.163e-01, -3.219e-01, -8.815e-01, -1.000e+00, -3.558e-01, -7.361e-01) * s0_1_1;
	r1 += M4(-1.557e-01, -2.094e-01, -4.226e-01, -5.955e-02, -7.082e-02, -9.924e-02, -1.051e-02, -2.568e-01, -3.176e-02, 5.450e-02, 9.008e-02, -4.405e-01, 9.944e-01, 1.000e+00, 1.000e+00, 6.592e-01) * s0_1_1;
	r2 += M4(-1.289e-04, -9.133e-02, -1.900e-02, -3.564e-01, 2.096e-01, 4.561e-01, 1.877e-01, -1.999e-01, 2.028e-01, 1.362e-01, 1.019e-01, 4.502e-01, -1.000e+00, -2.460e-01, -1.000e+00, 6.389e-01) * s0_1_1;
	r0 += M4(-3.413e-02, 1.522e-01, 2.666e-01, -2.568e-01, -7.675e-02, -6.712e-02, -1.000e+00, -4.935e-01, -7.297e-02, -1.587e-01, -1.752e-01, -3.256e-01, -1.000e-01, 1.000e+00, 4.942e-03, 2.711e-01) * s0_1_2;
	r1 += M4(7.305e-02, 6.333e-02, 2.020e-01, -9.332e-02, -1.028e-01, 8.507e-02, -7.478e-02, -2.794e-01, -2.788e-02, 7.113e-02, -9.447e-02, 3.812e-03, -2.745e-03, -4.186e-02, -2.788e-01, -3.740e-01) * s0_1_2;
	r2 += M4(-6.579e-02, 7.042e-03, -1.638e-01, 2.630e-01, 4.474e-02, 3.788e-01, -3.798e-01, -1.000e+00, 2.255e-01, 1.773e-01, -4.611e-02, 7.141e-01, 1.000e+00, 5.957e-01, 2.726e-01, 1.822e-01) * s0_1_2;
	r0 += M4(-4.413e-02, -8.110e-02, 1.733e-01, -4.716e-02, -1.948e-01, -3.406e-02, 6.315e-02, -1.191e-01, -2.999e-02, -1.333e-01, -8.745e-02, -3.727e-01, 2.600e-01, 1.831e-01, 7.347e-02, 1.570e-01) * s0_2_0;
	r1 += M4(-5.292e-02, -3.964e-02, -4.927e-02, -3.513e-02, 9.585e-02, 1.240e-02, -9.432e-02, -1.951e-03, 5.308e-02, -8.198e-02, -1.598e-01, -3.065e-02, -2.288e-01, 1.186e-02, -1.363e-01, -4.692e-02) * s0_2_0;
	r2 += M4(8.575e-02, -8.761e-02, 7.159e-02, 1.909e-01, 5.874e-02, 7.448e-02, -7.951e-02, -3.852e-02, -1.339e-02, -1.129e-01, 9.820e-02, 8.404e-02, -1.303e-01, 9.249e-02, 8.320e-02, -1.983e-01) * s0_2_0;
	r0 += M4(8.298e-02, -2.828e-01, 1.083e-01, -3.507e-01, -1.000e+00, -1.763e-01, -2.109e-01, -3.229e-02, 1.995e-02, 9.563e-02, 1.252e-02, -4.329e-01, -9.587e-02, -2.651e-01, 4.868e-01, 3.461e-01) * s0_2_1;
	r1 += M4(1.331e-01, -1.394e-02, -6.083e-02, 9.092e-02, 1.706e-01, -1.050e-03, -2.617e-01, 4.721e-01, 4.129e-02, 4.795e-02, 2.897e-02, -6.834e-02, -8.339e-01, -6.518e-02, 5.641e-02, -8.093e-01) * s0_2_1;
	r2 += M4(5.522e-02, -2.292e-01, 1.430e-01, -5.515e-02, -3.952e-03, 2.020e-01, -3.994e-01, -4.497e-01, -4.019e-02, 1.413e-02, 2.141e-01, 1.548e-01, -2.472e-01, -2.022e-01, 9.156e-01, 4.384e-02) * s0_2_1;
	r0 += M4(2.893e-02, 2.375e-02, 6.981e-02, -2.008e-01, -2.851e-02, 5.579e-02, -6.933e-01, -1.093e-01, 1.910e-03, -1.107e-01, -2.922e-01, -2.920e-01, 8.033e-02, 1.379e-01, 9.001e-02, 7.232e-02) * s0_2_2;
	r1 += M4(2.153e-02, 1.196e-02, 1.498e-01, -4.462e-02, -1.227e-01, -1.215e-01, -1.000e+00, 1.378e-01, 1.248e-01, 3.607e-02, -1.706e-01, 9.644e-02, 2.521e-01, -7.701e-02, -1.476e-01, 4.862e-02) * s0_2_2;
	r2 += M4(5.705e-02, -7.304e-02, 4.797e-02, 2.622e-01, -2.235e-01, 8.280e-02, -1.723e-01, 1.199e-02, 1.550e-01, -1.497e-01, 8.850e-02, -1.708e-01, 2.686e-01, 3.123e-03, 2.094e-01, 2.399e-01) * s0_2_2;
	r0 += M4(1.743e-02, 2.162e-02, -2.705e-01, 8.541e-03, -6.847e-01, -9.707e-01, -9.701e-03, -4.329e-01, 1.017e-01, -3.978e-01, -9.050e-01, -6.740e-02, -2.813e-02, -1.029e-02, -3.213e-01, 2.215e-01) * s1_0_0;
	r1 += M4(-6.438e-02, 1.218e-01, 1.275e-01, -9.679e-02, 6.449e-02, 9.014e-02, 3.034e-01, -2.684e-02, 2.263e-02, 3.026e-01, 4.867e-02, 2.220e-01, 3.647e-02, 3.076e-02, 8.321e-02, 1.903e-01) * s1_0_0;
	r2 += M4(3.959e-03, 1.115e-01, 5.470e-02, 1.020e-01, 4.647e-02, -1.686e-01, -4.564e-01, 6.777e-01, -2.129e-02, -8.355e-02, -7.396e-02, 2.056e-01, -3.353e-03, -1.276e-01, 5.279e-02, -6.756e-02) * s1_0_0;
	r0 += M4(-2.278e-03, -1.962e-02, 1.350e-01, 9.475e-02, -2.356e-01, 3.225e-01, -2.178e-01, 8.915e-02, 4.394e-02, 5.186e-02, -8.837e-01, 4.934e-01, 4.462e-02, 2.135e-01, 3.251e-01, 7.422e-02) * s1_0_1;
	r1 += M4(1.077e-02, 1.568e-01, 2.442e-01, 3.653e-02, 4.589e-01, -1.000e+00, -1.000e+00, 4.264e-01, 3.869e-02, 1.253e-01, 4.973e-02, 2.986e-01, 1.251e-01, 1.669e-01, 1.253e-03, 7.970e-02) * s1_0_1;
	r2 += M4(-1.456e-02, 1.795e-02, 1.353e-01, -1.255e-01, 7.803e-02, 2.321e-01, -1.931e-01, -2.728e-01, -1.303e-01, -3.080e-03, 3.760e-01, -9.116e-01, -1.401e-01, -3.980e-01, 9.876e-03, 9.862e-02) * s1_0_1;
	r0 += M4(1.749e-02, 7.756e-02, -3.959e-01, 1.424e-01, 1.929e-01, 4.029e-02, -4.165e-03, 9.807e-02, 4.771e-02, 9.015e-03, -2.281e-01, 2.560e-01, 2.636e-02, 1.018e-01, 4.183e-01, 7.684e-02) * s1_0_2;
	r1 += M4(-2.318e-02, 4.200e-03, -4.944e-02, 4.382e-02, 1.321e-01, 1.754e-01, -3.711e-02, -1.577e-01, -3.505e-02, -3.207e-02, -5.890e-02, 2.097e-02, -4.311e-02, -1.434e-01, -1.022e-01, 6.056e-02) * s1_0_2;
	r2 += M4(-2.209e-02, 1.783e-01, -3.688e-02, -1.647e-01, 6.135e-01, -2.452e-01, 4.898e-01, -4.224e-01, -3.689e-02, 5.741e-02, 3.279e-02, -1.440e-01, -1.575e-01, -3.849e-01, -7.024e-02, 6.273e-01) * s1_0_2;
	r0 += M4(-1.343e-01, -3.577e-02, -3.566e-01, -1.384e-01, 2.817e-01, -3.805e-02, 1.520e-03, -1.590e-01, 3.645e-02, -1.343e-01, 5.175e-01, 3.928e-02, 7.267e-02, -3.074e-02, 2.660e-01, 1.864e-01) * s1_1_0;
	r1 += M4(8.736e-03, 3.939e-02, 2.257e-01, 1.022e-01, 4.465e-02, -1.919e-01, 7.484e-02, -5.599e-02, -1.532e-01, -2.019e-01, -1.000e+00, -3.353e-01, -1.091e-01, 3.916e-02, 1.245e-01, 7.958e-02) * s1_1_0;
	r2 += M4(-9.057e-02, -1.241e-02, 2.014e-01, -4.626e-02, -9.226e-02, -1.763e-01, -4.973e-02, -2.377e-01, -6.554e-02, 1.675e-01, -1.138e-02, 2.292e-01, -8.253e-02, -2.262e-01, 1.119e-01, 1.293e-01) * s1_1_0;
	r0 += M4(9.010e-02, -4.271e-01, -1.000e+00, -8.031e-03, -9.351e-02, -7.534e-02, 3.852e-01, 2.037e-01, -9.456e-02, -1.981e-01, 9.799e-01, -1.531e-02, -1.146e-01, 2.724e-01, -4.912e-01, 1.457e-01) * s1_1_1;
	r1 += M4(-2.364e-01, 2.282e-01, -1.000e+00, 6.131e-02, -3.746e-01, -2.730e-02, -5.343e-01, -4.326e-01, -3.542e-01, -2.684e-01, -1.946e-01, -6.021e-02, -2.163e-01, -6.052e-02, 2.799e-01, 6.332e-02) * s1_1_1;
	r2 += M4(4.205e-01, 5.909e-02, -3.203e-01, -9.624e-01, -1.115e-02, 1.821e-01, 5.022e-01, 3.600e-02, 4.274e-01, 9.095e-02, -3.470e-01, -1.000e+00, -2.245e-03, -4.370e-01, 3.439e-01, 8.260e-02) * s1_1_1;
	r0 += M4(-2.725e-02, 1.172e-01, -2.215e-01, 3.598e-01, -2.058e-02, -1.938e-01, 5.189e-01, 1.907e-01, 1.074e-02, -4.596e-02, 6.904e-02, 7.984e-02, -6.037e-03, 2.144e-01, -9.785e-02, 2.963e-01) * s1_1_2;
	r1 += M4(-3.386e-02, 4.762e-02, 2.343e-01, 8.960e-02, -2.783e-01, -1.540e-01, -3.350e-01, -9.649e-02, -3.289e-03, 1.137e-02, 1.848e-01, -4.376e-03, 8.557e-03, -4.938e-02, -2.194e-01, -4.773e-02) * s1_1_2;
	r2 += M4(-4.333e-01, -8.242e-02, -6.248e-02, -1.000e+00, -8.459e-02, 1.903e-01, 1.575e-01, -1.179e-01, -2.487e-01, 9.155e-02, -1.597e-01, -3.643e-01, 2.681e-01, -2.356e-01, 1.325e-01, -6.117e-02) * s1_1_2;
	r0 += M4(-2.709e-01, 7.129e-02, 1.345e-01, -8.213e-02, -9.798e-02, -5.873e-02, 4.696e-02, -4.361e-02, 6.289e-02, -6.137e-02, -2.101e-01, 1.689e-01, 1.187e-02, -1.210e-02, 9.495e-02, 2.359e-01) * s1_2_0;
	r1 += M4(-1.116e-01, -8.051e-03, -1.783e-01, -2.173e-01, 7.713e-02, -1.681e-02, -1.096e-01, 5.927e-02, 9.691e-02, 6.013e-03, -4.719e-02, 5.006e-02, -5.400e-02, -4.347e-03, 1.444e-02, 4.294e-02) * s1_2_0;
	r2 += M4(1.607e-02, -3.888e-02, 1.254e-01, 1.706e-01, 4.800e-02, 1.372e-01, -1.541e-01, -8.599e-02, 8.272e-02, 1.190e-01, 1.061e-01, -2.887e-02, 1.276e-02, 2.746e-02, -1.128e-02, -3.060e-02) * s1_2_0;
	r0 += M4(-5.469e-02, -2.802e-01, -3.271e-01, -6.878e-02, 2.988e-02, 4.686e-02, -1.616e-01, -3.281e-02, 1.051e-01, 1.561e-01, -1.838e-01, 1.925e-01, -3.541e-02, 4.372e-02, 5.905e-02, 2.763e-01) * s1_2_1;
	r1 += M4(-1.000e+00, -3.535e-01, -1.000e+00, 2.964e-01, 1.616e-02, 3.699e-02, 1.080e-01, 9.888e-02, 3.504e-01, -2.541e-02, -2.183e-01, 1.566e-01, -1.716e-01, 6.138e-02, 6.659e-02, -1.207e-01) * s1_2_1;
	r2 += M4(2.066e-01, 2.575e-01, -3.623e-01, -9.319e-02, -2.438e-02, 7.291e-02, -1.151e-01, 4.321e-02, 3.700e-02, 2.359e-01, -1.526e-01, -1.068e-01, -7.251e-02, -1.567e-01, 3.512e-02, -3.867e-02) * s1_2_1;
	r0 += M4(-3.797e-02, 1.565e-01, 2.177e-01, 2.408e-02, 2.592e-02, -1.948e-02, -3.641e-02, -1.271e-01, -1.849e-02, -3.201e-02, 4.616e-02, -3.735e-02, 6.345e-03, 7.484e-02, -3.479e-03, 1.189e-01) * s1_2_2;
	r1 += M4(-5.804e-02, 2.224e-02, 1.251e-01, -3.038e-01, 1.520e-01, 2.760e-02, 6.016e-02, 7.404e-02, -8.219e-02, 6.735e-02, 2.422e-01, -5.336e-02, 3.610e-02, -8.163e-02, 7.029e-02, 2.130e-02) * s1_2_2;
	r2 += M4(-6.881e-01, 3.975e-01, -8.763e-02, -4.898e-02, 1.578e-02, 1.296e-02, -1.269e-01, 2.546e-03, -2.075e-01, 1.490e-01, -6.714e-02, 1.994e-03, 2.384e-02, -1.773e-01, -1.460e-02, 6.071e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-1.581e-01, 3.173e-02, 1.352e-01, 5.784e-02, 3.306e-02, -5.529e-02, -1.994e-01, -1.613e-03, 1.620e-02, -3.006e-01, -1.398e-01, -1.041e-01, -7.710e-03, -9.894e-03, -1.995e-01, 6.520e-02) * s0_0_0;
	r1 += M4(-8.822e-02, 9.110e-02, -9.861e-02, -1.121e-01, 7.748e-03, -1.189e-01, -7.323e-02, 1.378e-01, 6.518e-02, 1.971e-01, -5.935e-01, -1.145e-01, -3.559e-02, -7.472e-03, -9.385e-02, 2.102e-02) * s0_0_0;
	r2 += M4(1.470e-02, -1.001e-01, 9.784e-02, 1.392e-01, 4.106e-02, -6.911e-02, -2.327e-01, -2.023e-01, 6.232e-02, 3.616e-02, -3.545e-01, 1.981e-01, -1.529e-02, -1.930e-02, -3.052e-02, 1.811e-01) * s0_0_0;
	r0 += M4(2.098e-01, 5.003e-01, 1.000e+00, 2.906e-01, -6.377e-02, -7.514e-03, -6.634e-01, -1.351e-01, -3.126e-02, -8.783e-02, -5.321e-01, 2.082e-02, -1.959e-02, -1.252e-01, 6.134e-02, 9.392e-02) * s0_0_1;
	r1 += M4(1.802e-01, 6.566e-02, 2.872e-01, -1.699e-01, -2.333e-02, -8.756e-02, -6.177e-02, 4.978e-02, 1.771e-02, -1.216e-01, 5.458e-01, 1.504e-01, -6.499e-02, 5.963e-02, 2.455e-01, 6.368e-02) * s0_0_1;
	r2 += M4(-1.465e-01, -2.236e-01, 7.711e-01, -3.699e-02, 4.385e-02, 9.129e-02, -1.365e-01, 3.408e-01, -7.145e-02, -1.663e-01, -3.839e-01, -2.416e-01, 5.365e-02, -2.132e-01, -1.705e-01, -8.344e-03) * s0_0_1;
	r0 += M4(3.287e-02, -1.988e-01, 8.535e-01, -2.395e-01, 7.366e-03, -3.813e-02, 2.376e-02, -6.999e-02, -1.209e-02, 1.906e-01, -1.673e-01, -1.813e-01, -1.890e-02, 6.036e-02, -2.465e-01, 1.603e-01) * s0_0_2;
	r1 += M4(-3.144e-02, 7.621e-03, -1.234e-01, -1.920e-01, -2.959e-02, 1.753e-02, 9.215e-02, 1.612e-02, -3.579e-02, 1.217e-02, 3.041e-01, -9.740e-03, -4.324e-02, -5.772e-02, 6.915e-02, 8.869e-02) * s0_0_2;
	r2 += M4(-2.709e-01, 7.382e-02, -1.949e-01, 1.861e-01, 7.392e-02, -3.986e-02, 4.700e-02, -6.310e-02, -6.578e-02, -6.967e-02, -1.859e-01, 1.452e-01, 3.139e-03, -1.790e-01, 4.329e-02, 1.192e-02) * s0_0_2;
	r0 += M4(-3.165e-02, 1.754e-01, 8.788e-02, 3.988e-02, -6.285e-02, 3.479e-01, 1.909e-01, 2.900e-01, 2.560e-01, -1.642e-01, 2.036e-01, 6.390e-02, -1.465e-01, -2.172e-01, 2.364e-01, 1.650e-01) * s0_1_0;
	r1 += M4(-1.256e-01, -1.104e-01, -1.371e-01, -6.578e-02, -1.801e-01, -1.999e-01, -3.258e-01, -7.427e-02, 5.226e-02, 6.697e-02, 3.778e-01, 8.061e-02, 1.406e-02, 6.133e-02, -1.643e-01, 9.060e-02) * s0_1_0;
	r2 += M4(1.019e-01, 1.285e-01, 1.378e-01, -1.507e-01, -7.033e-02, 1.222e-01, -1.293e-01, 1.190e-01, -5.253e-02, -1.567e-01, -2.082e-02, 1.924e-01, 1.739e-02, -8.714e-02, -5.773e-02, 1.995e-01) * s0_1_0;
	r0 += M4(6.300e-01, 5.950e-01, 8.311e-02, 7.859e-02, -9.686e-02, -3.330e-01, -4.053e-01, -1.518e-01, 1.134e-01, -3.246e-02, 1.382e-01, 5.903e-01, 1.391e-02, -5.561e-02, 4.172e-01, 2.014e-01) * s0_1_1;
	r1 += M4(-1.000e+00, -1.000e+00, -1.000e+00, -4.752e-01, -6.556e-02, -4.056e-01, 1.953e-01, -5.849e-02, -5.391e-01, 1.866e-01, 5.628e-01, -2.333e-01, -2.431e-01, -3.008e-01, -2.006e-02, 4.165e-02) * s0_1_1;
	r2 += M4(5.096e-01, -4.891e-01, 8.457e-01, -7.642e-01, -3.605e-01, 4.587e-01, 1.706e-01, -1.000e+00, -8.935e-02, -2.185e-01, 3.021e-01, -1.580e-01, -6.969e-02, -2.327e-01, -1.982e-01, 3.263e-01) * s0_1_1;
	r0 += M4(5.037e-03, -1.000e+00, -4.447e-01, -1.167e-01, 1.658e-02, 3.954e-02, 6.558e-02, 1.733e-02, -6.372e-02, 2.269e-01, -2.328e-01, 3.246e-02, 1.061e-02, 1.485e-01, 1.985e-01, 6.229e-02) * s0_1_2;
	r1 += M4(-2.093e-02, -1.446e-01, -1.600e-02, 3.425e-01, -1.183e-01, -2.202e-02, -7.008e-02, 5.846e-02, -5.310e-02, 1.248e-02, 1.608e-02, 3.204e-02, 2.432e-02, 2.430e-02, 1.106e-01, -2.893e-03) * s0_1_2;
	r2 += M4(-5.407e-01, -5.017e-01, -2.803e-01, 3.953e-02, 1.251e-01, 7.477e-02, -2.849e-02, 2.285e-02, -1.883e-02, -3.398e-01, 1.062e-01, 1.072e-01, -1.788e-01, 1.498e-02, -9.585e-02, -3.499e-01) * s0_1_2;
	r0 += M4(-3.538e-01, -1.491e-01, -1.973e-01, -1.186e-01, 2.791e-03, 2.026e-01, 3.119e-02, 1.984e-01, -1.171e-02, 8.594e-02, 2.177e-01, -6.607e-02, 5.289e-02, -8.891e-02, 1.829e-02, 1.461e-01) * s0_2_0;
	r1 += M4(1.449e-01, 1.030e-01, 1.836e-01, 2.237e-02, 2.297e-01, -6.369e-02, -3.004e-01, 2.108e-01, -6.566e-02, -2.402e-02, -4.704e-02, -7.058e-02, 3.146e-02, 2.459e-03, 5.913e-02, 6.522e-02) * s0_2_0;
	r2 += M4(8.380e-02, 8.492e-02, -1.726e-01, 1.391e-01, -1.195e-01, 2.622e-01, 6.348e-02, -6.066e-01, -7.564e-02, -2.410e-02, -4.044e-02, -1.580e-02, 1.612e-02, -1.254e-01, 4.672e-02, 1.180e-01) * s0_2_0;
	r0 += M4(8.152e-02, 1.566e-01, -2.922e-01, -3.261e-01, -4.468e-02, -1.577e-01, 2.635e-02, 6.220e-02, 3.189e-03, 2.580e-02, -3.272e-02, -8.914e-02, 4.445e-02, 8.131e-02, 7.400e-02, 7.516e-02) * s0_2_1;
	r1 += M4(4.057e-01, 1.034e-01, 3.818e-01, 4.845e-01, 4.265e-01, -1.050e-01, 1.671e-02, 2.390e-01, 5.050e-02, 4.415e-02, 5.872e-02, 1.611e-03, 1.189e-01, -5.421e-03, 9.824e-02, 2.402e-02) * s0_2_1;
	r2 += M4(4.665e-02, 2.183e-01, -5.805e-01, 2.969e-02, 9.588e-02, -2.075e-02, 2.994e-01, 8.149e-01, 1.087e-01, -2.141e-02, -3.335e-02, 5.734e-02, 2.763e-02, -9.065e-02, -3.618e-03, 1.318e-01) * s0_2_1;
	r0 += M4(-7.653e-02, -1.023e-01, -2.669e-01, -1.017e-01, 2.365e-02, -1.858e-01, 4.145e-01, -1.222e-01, 1.049e-02, -1.145e-02, 1.019e-01, 1.483e-02, 2.337e-02, 7.639e-02, 9.316e-02, 8.938e-02) * s0_2_2;
	r1 += M4(-3.451e-03, 9.178e-02, 2.414e-01, 5.089e-03, 2.833e-02, -5.927e-03, -4.168e-02, 6.243e-02, 3.674e-02, -4.283e-02, -9.317e-02, 4.555e-02, -2.322e-02, -3.640e-02, 9.692e-02, -4.656e-02) * s0_2_2;
	r2 += M4(-1.054e-01, -1.482e-01, -2.885e-01, 1.166e-01, -2.421e-02, 1.597e-01, -2.982e-02, -3.238e-02, -1.641e-02, 8.712e-02, 2.022e-02, 4.055e-02, -3.309e-02, -4.769e-02, 1.062e-02, 3.606e-02) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_2x12_SOFT] -conv2
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-3.330e-03, -4.898e-02, 3.230e-03, 1.659e-01, 1.514e-03, -7.293e-03, 6.939e-03, 3.448e-01, -1.775e-02, 3.356e-02, -4.240e-02, 2.991e-02, -6.027e-03, 2.759e-04, 4.826e-02, -1.314e-01) * s0_0_0;
	r1 += M4(7.676e-02, 5.992e-02, 4.840e-02, -1.740e-02, 3.027e-03, -1.859e-02, -3.834e-02, -9.202e-02, -3.713e-02, -5.652e-03, -4.783e-02, -7.695e-03, -2.628e-02, 1.418e-02, 1.168e-02, -1.512e-01) * s0_0_0;
	r2 += M4(1.242e-04, 4.714e-02, -1.589e-02, 2.031e-02, -9.169e-03, 2.593e-02, -4.313e-02, -1.421e-01, 7.722e-04, -3.651e-02, -7.602e-03, -6.145e-02, -1.921e-02, -7.730e-02, -4.120e-02, -3.192e-02) * s0_0_0;
	r0 += M4(-5.013e-02, -7.142e-02, 8.666e-02, -1.124e-02, -8.441e-02, -1.179e-01, -7.911e-02, -1.065e-01, -1.236e-02, -1.724e-01, -1.297e-01, 1.384e-01, 2.880e-02, 1.380e-01, -5.725e-02, -1.466e-01) * s0_0_1;
	r1 += M4(3.570e-02, 5.601e-02, 6.462e-02, -1.588e-01, 9.801e-03, -7.460e-02, -7.440e-03, -3.988e-02, -1.007e-02, 9.715e-02, -2.128e-02, 9.155e-02, 5.377e-02, -9.545e-02, -1.726e-02, 1.154e-01) * s0_0_1;
	r2 += M4(4.418e-02, -8.126e-04, -4.803e-02, -1.209e-01, -7.250e-02, -1.772e-01, 9.435e-02, 6.396e-03, -8.220e-02, -2.257e-01, 2.081e-02, 3.385e-02, -2.094e-02, 6.391e-02, 2.287e-02, 1.596e-04) * s0_0_1;
	r0 += M4(1.291e-01, -2.146e-02, -7.798e-02, -1.116e-02, -9.430e-03, 7.469e-04, -1.457e-02, 6.271e-02, 3.645e-02, 1.260e-02, -4.123e-02, 2.080e-02, -1.342e-01, 9.503e-03, -1.246e-01, -2.745e-01) * s0_0_2;
	r1 += M4(-5.046e-02, -9.449e-02, 5.542e-02, -8.230e-02, 8.211e-03, -1.501e-02, -1.115e-02, 9.144e-03, 6.462e-04, 2.210e-02, -1.017e-03, 2.789e-02, 1.900e-02, 4.719e-02, 3.254e-02, 5.548e-02) * s0_0_2;
	r2 += M4(3.000e-02, -3.564e-03, -2.248e-01, 5.110e-02, 2.591e-03, 5.838e-02, 3.754e-02, -1.611e-02, 1.747e-02, 1.729e-02, -6.784e-03, 5.644e-03, -3.891e-02, -2.114e-01, 1.146e-02, 5.114e-02) * s0_0_2;
	r0 += M4(1.173e-02, 5.600e-02, 2.788e-02, -1.164e-01, 8.298e-02, -6.900e-02, 6.496e-03, 2.009e-02, 3.336e-04, 7.880e-02, 3.431e-02, -4.920e-01, -4.798e-02, 1.180e-01, -2.296e-01, -9.435e-02) * s0_1_0;
	r1 += M4(-3.778e-02, 3.342e-02, -3.308e-02, -3.529e-02, 1.676e-01, -1.029e-01, -3.486e-01, 3.174e-01, 2.321e-02, 6.550e-02, 7.543e-02, -2.583e-01, -1.561e-01, -7.291e-02, -6.502e-02, 2.447e-01) * s0_1_0;
	r2 += M4(6.500e-03, -3.772e-02, -4.679e-02, 4.942e-03, 1.679e-02, 1.263e-01, 2.210e-02, 1.444e-01, 3.482e-02, 6.653e-02, -3.512e-02, 2.414e-02, -8.854e-02, -6.843e-02, -1.393e-03, -4.789e-02) * s0_1_0;
	r0 += M4(-2.603e-01, 2.222e-01, 4.369e-01, 9.693e-02, 2.063e-02, -1.872e-01, -3.104e-01, 1.173e-01, -4.895e-02, -1.278e-01, 1.196e-01, -4.195e-01, 4.944e-02, -1.615e-01, -3.869e-01, -2.227e-01) * s0_1_1;
	r1 += M4(7.556e-01, 5.521e-01, 5.862e-01, 2.130e-01, -2.142e-01, -8.772e-02, 3.129e-01, -2.627e-01, -9.301e-02, -2.957e-01, -8.741e-03, -4.693e-01, 1.281e-01, -3.251e-01, -9.573e-02, -2.592e-01) * s0_1_1;
	r2 += M4(-1.945e-01, 1.811e-01, 9.186e-02, 1.869e-01, 2.165e-01, -5.572e-01, 1.988e-01, -1.628e-01, 3.286e-02, 6.145e-02, -7.216e-02, -1.204e-01, 1.528e-02, -4.671e-01, -8.903e-02, 2.173e-02) * s0_1_1;
	r0 += M4(3.754e-01, -1.000e+00, -6.441e-03, 1.401e-01, 6.113e-02, 3.552e-02, -2.169e-02, -6.036e-02, 3.341e-02, 8.583e-02, 1.156e-01, 7.366e-02, -3.305e-01, -6.017e-02, -2.381e-02, -1.346e-01) * s0_1_2;
	r1 += M4(-2.368e-01, 7.300e-02, 2.171e-02, -2.313e-02, 4.936e-02, -6.691e-02, 1.119e-02, 1.132e-02, 2.912e-02, -1.003e-01, -5.463e-03, 2.682e-02, -5.651e-02, -1.307e-01, 2.114e-02, -3.607e-02) * s0_1_2;
	r2 += M4(-8.949e-02, -1.901e-01, 3.237e-01, -3.560e-02, -2.618e-02, -7.145e-03, -4.454e-02, -8.921e-03, -1.882e-02, 1.283e-01, 4.274e-03, 3.090e-02, 1.256e-01, 8.062e-02, -3.982e-03, -2.010e-02) * s0_1_2;
	r0 += M4(4.486e-02, 7.008e-04, 1.079e-01, 9.700e-03, 1.096e-01, -4.953e-02, -9.988e-02, 7.293e-02, 1.697e-02, -5.507e-02, -2.064e-01, -8.507e-02, 6.980e-02, -3.083e-02, -1.687e-01, -1.056e-01) * s0_2_0;
	r1 += M4(2.147e-02, 2.994e-02, -7.127e-03, 3.507e-02, 1.525e-02, -2.437e-02, -1.497e-02, -4.476e-02, 2.347e-02, -1.365e-01, -5.102e-02, 5.171e-02, 7.110e-02, -2.232e-01, 6.352e-02, -1.023e-01) * s0_2_0;
	r2 += M4(-6.030e-02, 2.776e-02, -5.296e-03, 1.505e-02, 9.399e-02, 5.940e-02, 2.063e-02, -4.792e-03, -1.862e-01, -4.779e-02, -2.350e-02, -4.174e-03, 1.192e-01, 7.934e-02, 8.036e-03, 1.732e-02) * s0_2_0;
	r0 += M4(-4.161e-01, -2.544e-02, -5.021e-02, 8.286e-02, -4.082e-01, -4.185e-02, 1.294e-01, 2.466e-02, -7.160e-02, -3.124e-01, -1.504e-01, -1.832e-01, -2.181e-01, 6.344e-02, 1.216e-01, -5.865e-02) * s0_2_1;
	r1 += M4(1.893e-02, -3.108e-02, 2.395e-02, -2.083e-02, -3.679e-02, -6.724e-02, -6.988e-02, -1.066e-03, -7.746e-02, -1.370e-01, -7.664e-02, 2.824e-03, -6.464e-02, 2.936e-02, 9.664e-03, -1.759e-03) * s0_2_1;
	r2 += M4(5.720e-02, 1.379e-02, -1.094e-03, -4.038e-02, -3.044e-01, -5.124e-02, -1.749e-02, 2.298e-02, -3.222e-01, -1.415e-01, -7.626e-02, -4.139e-02, -5.285e-02, -4.413e-02, -1.466e-02, -4.944e-02) * s0_2_1;
	r0 += M4(2.906e-01, -3.291e-01, 3.392e-02, -1.141e-01, -5.669e-02, 1.928e-02, -1.043e-01, -1.229e-02, -6.806e-02, -1.133e-02, -5.395e-02, -2.100e-02, 1.151e-01, -1.745e-02, -4.369e-02, -3.836e-03) * s0_2_2;
	r1 += M4(7.905e-03, -5.945e-02, 3.354e-02, 3.006e-03, -1.245e-03, -6.583e-02, 1.319e-02, -1.578e-02, 1.717e-02, 2.076e-03, 2.454e-05, 1.313e-02, 5.685e-02, 2.216e-02, -6.076e-03, 6.244e-02) * s0_2_2;
	r2 += M4(4.232e-02, 8.451e-02, -6.000e-02, -4.349e-02, 6.372e-02, 2.896e-02, -1.735e-02, 1.459e-02, -3.251e-02, 4.797e-02, -9.649e-03, 1.569e-02, -1.764e-02, 4.649e-02, 5.112e-02, 1.287e-02) * s0_2_2;
	r0 += M4(3.993e-03, -5.683e-02, 1.334e-01, -1.374e-01, 5.299e-03, -4.475e-02, 3.937e-02, 6.556e-02, -5.810e-02, -1.159e-02, -4.595e-03, -4.631e-01, 3.528e-02, 6.117e-02, -2.710e-02, 1.113e-01) * s1_0_0;
	r1 += M4(1.061e-01, 4.967e-02, 4.442e-02, 1.675e-01, 1.311e-03, 3.189e-02, -1.016e-02, -2.404e-02, -8.142e-03, 7.074e-03, -5.504e-03, 2.160e-02, -2.184e-02, -6.215e-03, -3.113e-02, -3.816e-02) * s1_0_0;
	r2 += M4(-8.182e-03, 4.684e-02, 2.020e-02, 1.392e-01, -1.036e-02, -1.296e-02, -2.106e-02, -2.475e-02, -8.641e-03, -5.477e-02, 5.593e-03, -1.407e-02, 1.352e-02, 5.173e-02, 2.499e-02, -4.111e-02) * s1_0_0;
	r0 += M4(6.485e-03, 4.444e-01, 2.463e-01, -8.695e-02, 2.359e-02, -6.147e-03, -1.173e-03, -1.297e-02, 5.732e-02, 7.421e-03, -4.921e-02, 4.389e-02, -6.024e-02, -6.779e-02, -1.873e-02, 2.436e-01) * s1_0_1;
	r1 += M4(-8.651e-02, -6.408e-04, 3.330e-01, 5.950e-01, 3.476e-03, 2.179e-02, 1.909e-03, 4.410e-02, -5.819e-02, -3.655e-02, -2.817e-02, -3.914e-02, 6.098e-03, 3.551e-02, 3.932e-02, 1.190e-01) * s1_0_1;
	r2 += M4(6.190e-02, 4.062e-01, 4.894e-02, 5.387e-01, -2.424e-02, -5.555e-02, 1.594e-02, 3.009e-02, -7.348e-03, -1.181e-01, -1.244e-01, -2.609e-02, -5.629e-03, -1.465e-01, -1.961e-02, 1.003e-01) * s1_0_1;
	r0 += M4(-1.091e-01, 5.092e-03, -2.550e-02, 1.508e-02, -3.842e-02, 1.071e-02, 2.765e-03, -2.700e-02, 3.061e-03, 1.375e-02, 1.819e-02, 1.331e-02, 1.460e-01, -5.223e-02, -6.063e-02, 3.591e-02) * s1_0_2;
	r1 += M4(1.178e-02, 5.716e-02, 5.430e-04, 6.665e-02, -4.327e-03, -5.091e-03, -1.959e-02, -1.958e-03, -3.267e-04, -3.594e-02, 2.205e-03, 1.985e-02, 3.194e-03, -7.224e-02, 3.661e-02, -6.177e-02) * s1_0_2;
	r2 += M4(-1.178e-03, -1.411e-01, 5.224e-02, 4.965e-02, 1.917e-02, 6.068e-02, 1.407e-03, -1.646e-02, -2.391e-02, 1.841e-02, 6.838e-03, 1.414e-02, 5.403e-02, 1.411e-01, -3.487e-02, -2.132e-03) * s1_0_2;
	r0 += M4(-7.255e-02, 8.425e-02, 6.469e-02, 1.409e-01, -2.465e-02, -8.605e-02, 6.399e-02, 3.069e-02, -9.784e-02, 2.839e-01, -1.536e-01, 3.240e-01, 1.170e-01, -3.738e-02, -3.745e-02, -1.053e-01) * s1_1_0;
	r1 += M4(-2.915e-02, 1.762e-01, 1.633e-02, -1.567e-02, -2.692e-02, 9.269e-02, -5.065e-02, 2.559e-02, -2.932e-01, -5.153e-03, -5.779e-03, -3.077e-01, 1.521e-01, -1.674e-01, 7.979e-02, 7.518e-03) * s1_1_0;
	r2 += M4(6.969e-02, -2.373e-02, 1.199e-02, -4.400e-02, 6.192e-03, -8.008e-03, -4.720e-03, -9.885e-02, -5.950e-02, -1.667e-01, -6.930e-02, -1.464e-01, 2.466e-03, 1.533e-01, 2.397e-02, 1.175e-01) * s1_1_0;
	r0 += M4(2.704e-01, 3.478e-02, -4.578e-02, 4.189e-01, 6.227e-02, 2.167e-02, -1.958e-01, -3.389e-03, 8.772e-02, -1.000e+00, -4.788e-01, -2.043e-01, -4.414e-01, -4.574e-01, 7.278e-02, -4.500e-01) * s1_1_1;
	r1 += M4(-7.698e-02, -1.451e-01, -3.442e-01, 2.309e-01, -6.208e-02, -1.079e-01, -3.333e-02, -1.330e-01, -1.461e-01, -1.502e-01, -2.494e-01, -2.868e-01, -4.231e-02, 9.449e-02, 4.372e-01, -3.898e-01) * s1_1_1;
	r2 += M4(-1.192e-01, -3.096e-01, -1.094e-01, 1.538e-01, -7.799e-03, -2.686e-01, -1.837e-01, -4.281e-01, -9.650e-02, -4.341e-01, -6.989e-02, -1.237e-01, 1.367e-01, 2.626e-01, 1.085e-01, -1.612e-01) * s1_1_1;
	r0 += M4(-2.026e-01, -1.659e-01, -1.758e-03, 5.945e-03, -3.366e-02, 4.531e-02, 1.440e-01, 9.229e-02, -1.813e-01, -2.094e-01, -1.650e-01, -1.603e-01, 3.983e-01, 2.102e-01, 1.573e-01, -3.011e-02) * s1_1_2;
	r1 += M4(-2.855e-02, 7.873e-02, 8.369e-02, -4.751e-02, 6.328e-02, 4.859e-02, -7.054e-03, 1.443e-01, -5.105e-02, -1.171e-01, -9.292e-02, -1.505e-01, 6.958e-02, -5.539e-02, 1.565e-03, 9.601e-02) * s1_1_2;
	r2 += M4(1.972e-02, -1.500e-02, 2.899e-02, -8.883e-03, -1.719e-02, 1.638e-01, 1.450e-01, 7.146e-02, -3.205e-02, -1.679e-01, -1.736e-01, -1.179e-01, -8.246e-02, 2.177e-01, 6.421e-02, 6.802e-02) * s1_1_2;
	r0 += M4(-4.234e-02, -4.640e-02, 1.105e-01, 5.485e-03, 7.172e-02, -1.541e-01, 5.002e-02, -1.653e-01, -1.336e-01, 1.876e-01, 1.137e-01, 5.421e-02, 7.110e-02, 3.725e-02, -1.114e-01, -4.818e-02) * s1_2_0;
	r1 += M4(-9.284e-04, 8.669e-03, -3.569e-02, -4.549e-02, 5.844e-02, 1.224e-01, 5.151e-02, 3.941e-02, -1.304e-01, 6.848e-02, -1.052e-02, 5.352e-02, -9.528e-03, 4.095e-04, 3.405e-02, 5.425e-02) * s1_2_0;
	r2 += M4(4.289e-02, 3.718e-02, 2.415e-03, 5.789e-02, 3.057e-02, 7.364e-02, 3.121e-02, 6.040e-02, -1.872e-02, -7.987e-02, -1.686e-02, -3.594e-02, -6.366e-02, -9.828e-03, 1.571e-02, -4.670e-02) * s1_2_0;
	r0 += M4(1.961e-01, -2.552e-01, -5.455e-02, -1.738e-01, 1.117e-01, 3.554e-01, 4.856e-01, 9.707e-02, -1.159e-02, 1.352e-01, -8.307e-02, 1.114e-01, -2.483e-01, 1.875e-01, 1.129e-01, 1.441e-01) * s1_2_1;
	r1 += M4(8.129e-02, -1.380e-01, 1.502e-01, -1.280e-01, -1.551e-02, 6.476e-01, -2.238e-02, 3.213e-01, -1.523e-01, 1.082e-02, -7.375e-02, 9.171e-03, -5.609e-02, 2.034e-01, -1.231e-01, 1.597e-01) * s1_2_1;
	r2 += M4(3.447e-01, 1.157e-01, 5.652e-02, 3.856e-03, 3.913e-01, 2.464e-01, 1.686e-01, 1.363e-01, -1.913e-01, -2.206e-01, -7.254e-02, -5.904e-02, -3.464e-01, -1.043e-01, -2.675e-02, 1.321e-02) * s1_2_1;
	r0 += M4(-7.327e-02, 8.780e-02, 2.885e-01, -9.624e-02, -2.270e-02, -2.037e-01, -1.114e-01, 3.308e-02, -5.595e-01, 8.213e-02, -4.431e-01, 5.461e-02, 9.365e-02, -5.674e-02, -2.305e-01, 1.142e-01) * s1_2_2;
	r1 += M4(-2.736e-02, -3.531e-02, -4.579e-02, -1.961e-02, 3.306e-02, 3.644e-02, 9.643e-02, 7.718e-02, -1.074e-02, 4.565e-02, 5.944e-03, 1.054e-02, 2.687e-02, 1.431e-01, 7.446e-02, 5.372e-02) * s1_2_2;
	r2 += M4(-4.158e-02, -4.948e-03, -1.963e-02, -2.476e-02, 1.295e-01, 1.274e-01, 1.440e-01, 8.482e-03, -3.240e-02, -1.299e-01, -9.047e-04, -9.863e-03, 1.013e-01, 6.020e-02, 7.105e-02, 3.515e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(7.951e-02, 5.993e-02, 1.730e-02, 2.051e-01, 1.884e-02, -1.891e-01, -5.956e-03, 3.177e-02, -2.149e-04, 5.165e-02, -7.440e-02, -2.949e-01, 8.070e-03, -1.364e-02, 2.769e-02, -3.175e-01) * s0_0_0;
	r1 += M4(1.374e-02, -5.394e-02, -2.870e-03, 4.347e-02, -6.638e-02, 6.181e-02, -1.652e-02, 2.434e-02, -9.480e-02, -5.197e-02, -4.236e-02, 1.369e-01, 1.161e-03, 6.169e-03, 1.022e-02, 1.004e-02) * s0_0_0;
	r2 += M4(1.291e-02, 1.020e-01, 4.150e-02, 4.480e-02, 5.182e-04, 1.636e-02, 8.446e-03, 1.836e-02, -3.781e-02, -1.290e-02, 5.774e-02, 1.671e-02, -1.537e-02, 4.188e-02, 1.201e-02, 1.195e-02) * s0_0_0;
	r0 += M4(-2.129e-02, -9.258e-02, 8.374e-02, -9.685e-02, -1.934e-02, -8.416e-02, -2.749e-02, 1.218e-01, 1.512e-02, -1.714e-01, -7.346e-02, 7.513e-02, -6.127e-02, 6.592e-03, -1.900e-02, 4.187e-02) * s0_0_1;
	r1 += M4(2.068e-02, 2.368e-02, -1.590e-02, 6.229e-02, -1.293e-02, 7.926e-02, -1.455e-02, -6.996e-02, -1.102e-01, 1.017e-01, -6.497e-02, 1.254e-01, -9.291e-03, 6.490e-03, -3.455e-03, 7.658e-03) * s0_0_1;
	r2 += M4(7.071e-02, 1.385e-01, 4.605e-02, 5.787e-03, -7.890e-02, -2.651e-01, -3.677e-02, -6.931e-02, -1.074e-01, -1.524e-01, 5.040e-02, -7.262e-02, 9.247e-03, -5.576e-02, -1.448e-02, 9.094e-03) * s0_0_1;
	r0 += M4(5.266e-03, 4.805e-02, -1.069e-02, -8.791e-02, -1.215e-02, -5.751e-02, 1.760e-02, -1.726e-02, -1.331e-01, 6.776e-02, 5.871e-03, -1.863e-01, -5.309e-02, 1.094e-02, 1.468e-02, -2.785e-02) * s0_0_2;
	r1 += M4(-2.990e-02, -6.486e-03, -1.037e-02, 8.435e-03, 1.987e-02, -1.752e-02, -2.361e-02, -1.554e-02, 1.561e-02, 1.133e-02, -3.587e-02, -4.263e-02, 2.112e-03, -1.706e-02, -1.557e-02, -4.463e-02) * s0_0_2;
	r2 += M4(3.467e-02, -1.027e-01, -5.110e-02, 2.542e-02, -1.779e-02, 1.475e-02, -7.690e-02, -2.163e-02, -6.591e-02, 7.396e-02, -1.228e-02, -5.964e-02, -1.308e-02, 2.975e-02, -6.114e-03, -3.110e-02) * s0_0_2;
	r0 += M4(-5.321e-02, 6.345e-02, -8.740e-02, -5.431e-01, 1.087e-01, -8.274e-02, -1.204e-01, -2.680e-01, -8.695e-03, -4.849e-02, -1.151e-01, 1.895e-01, 8.515e-05, -1.430e-01, -1.316e-01, -1.000e+00) * s0_1_0;
	r1 += M4(-1.332e-01, -1.570e-01, 5.646e-02, -1.802e-01, -1.868e-01, -1.190e-01, -4.918e-02, -3.563e-01, 2.468e-02, -1.838e-01, -3.311e-03, -1.621e-01, -1.456e-01, -1.851e-01, -9.463e-02, -1.391e-01) * s0_1_0;
	r2 += M4(-4.541e-02, -8.377e-02, -1.785e-02, -2.557e-02, 3.277e-02, -4.023e-02, -1.606e-02, -1.350e-01, 4.589e-02, -7.020e-02, -2.325e-02, -7.219e-02, 6.207e-03, -1.794e-01, -3.218e-02, -7.138e-02) * s0_1_0;
	r0 += M4(-7.508e-02, 4.962e-01, 1.508e-01, 9.073e-02, -4.343e-01, -3.380e-01, 1.485e-01, -6.057e-02, -9.526e-02, 1.404e-01, -4.938e-01, -2.028e-01, -1.270e-01, -2.562e-01, -3.293e-01, -2.996e-01) * s0_1_1;
	r1 += M4(2.231e-01, -9.477e-02, 3.030e-01, 2.823e-01, 3.957e-02, -1.413e-01, -6.335e-02, 6.117e-02, -7.883e-02, -8.443e-01, -7.222e-02, -2.740e-01, -9.404e-02, -3.716e-01, -2.088e-01, -2.783e-01) * s0_1_1;
	r2 += M4(-4.948e-02, 6.816e-01, 5.479e-01, 1.579e-01, 7.498e-02, 9.679e-02, 5.070e-02, -5.299e-02, 1.979e-01, -3.612e-01, -1.803e-01, -1.150e-01, 1.315e-02, -2.504e-01, -1.425e-01, -4.512e-02) * s0_1_1;
	r0 += M4(3.678e-02, -1.257e-01, -1.524e-02, 1.033e-01, -1.069e-01, -4.898e-02, -4.729e-02, -1.207e-03, -1.705e-01, -1.303e-01, -1.029e-02, -2.451e-02, -2.470e-01, 8.330e-02, 3.876e-02, 5.727e-02) * s0_1_2;
	r1 += M4(-4.606e-02, 6.875e-02, -9.717e-03, -4.147e-02, -1.945e-02, -2.694e-04, -2.121e-03, -4.176e-02, -4.024e-02, 5.343e-02, -7.201e-02, -1.243e-01, -3.049e-03, 4.265e-02, -1.277e-02, -1.800e-02) * s0_1_2;
	r2 += M4(2.946e-02, -1.092e-01, 3.485e-02, -1.354e-02, -3.022e-02, -1.140e-01, -5.921e-02, 5.186e-03, -1.952e-02, -2.193e-01, -1.855e-01, -1.125e-01, -2.015e-02, -4.287e-02, -4.120e-03, -2.906e-02) * s0_1_2;
	r0 += M4(-1.733e-01, -1.049e-01, 7.001e-02, -2.295e-02, 1.959e-02, -1.409e-02, 8.926e-02, 2.035e-01, -8.518e-02, 1.201e-02, 3.011e-02, 3.509e-02, 3.561e-02, -1.304e-01, -2.543e-01, 3.704e-02) * s0_2_0;
	r1 += M4(3.142e-02, 8.179e-02, -5.606e-02, -5.840e-03, -1.763e-01, 1.155e-01, -7.935e-02, 1.188e-01, -6.424e-02, 1.136e-01, 9.362e-03, 2.519e-02, -1.147e-01, -1.849e-01, -4.338e-02, -9.180e-02) * s0_2_0;
	r2 += M4(-1.839e-01, -1.296e-02, -7.182e-03, -5.471e-02, -8.876e-02, -1.142e-01, -7.323e-02, -1.341e-02, 2.432e-02, -1.846e-02, 1.368e-02, -1.270e-03, -3.643e-02, -2.251e-01, -4.318e-02, -9.203e-02) * s0_2_0;
	r0 += M4(-5.218e-02, 4.811e-02, -3.110e-01, -6.304e-02, 1.270e-01, -1.822e-01, -2.485e-01, -5.267e-02, -8.249e-02, 2.033e-02, -8.713e-02, -9.756e-03, -1.517e-01, -1.647e-01, -2.398e-01, 4.528e-02) * s0_2_1;
	r1 += M4(6.062e-02, 4.295e-02, 3.174e-02, 1.129e-02, 1.123e-01, -1.124e-01, -4.261e-02, -1.035e-02, -8.112e-03, -8.753e-02, -1.593e-02, -1.625e-02, -5.272e-02, -2.927e-01, -1.180e-01, -6.662e-02) * s0_2_1;
	r2 += M4(9.910e-02, 1.547e-01, 5.190e-02, -2.366e-02, -1.277e-01, -1.150e-02, -7.683e-02, 1.709e-02, -8.321e-02, 1.159e-02, 1.079e-02, 6.523e-03, -1.642e-01, -2.226e-01, -5.299e-02, -1.341e-01) * s0_2_1;
	r0 += M4(5.827e-02, 1.794e-02, 1.523e-01, -2.491e-02, -1.101e-01, -4.773e-02, -9.541e-03, -1.743e-02, -1.000e+00, 1.091e-01, 1.072e-01, 3.430e-02, -5.987e-01, 4.340e-02, -1.765e-02, -1.252e-02) * s0_2_2;
	r1 += M4(-1.738e-02, 9.102e-02, -4.100e-02, 2.205e-03, -1.752e-02, -1.929e-02, 8.239e-03, -3.164e-02, -6.043e-02, 6.190e-02, -4.275e-02, -7.590e-03, -4.193e-03, -2.990e-03, 8.114e-03, 2.463e-04) * s0_2_2;
	r2 += M4(-1.180e-01, -2.738e-02, -2.346e-02, -3.539e-04, 7.717e-03, 2.570e-03, 1.519e-02, -2.741e-02, -8.850e-02, -2.611e-02, -4.461e-02, 1.185e-02, -7.628e-02, 2.359e-02, -2.527e-03, 7.269e-04) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_2x12_SOFT] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv2
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(4.249e-01, -3.457e-01, 7.591e-02, -1.341e-01, 1.134e-02, -4.834e-03, -6.176e-03, -2.530e-04, -8.778e-02, 1.381e-02, -2.979e-04, 5.270e-04, 5.636e-04, -3.071e-04, -6.583e-04, -3.970e-04) * s0_0_0;
	r0 += M4(-2.662e-03, 2.157e-02, -5.385e-04, 1.591e-02, 6.461e-02, 5.461e-02, -9.771e-03, -9.916e-03, 8.772e-02, -7.791e-02, 2.913e-04, 2.959e-02, 4.093e-03, 8.392e-03, 1.747e-03, 4.878e-04) * s0_0_1;
	r0 += M4(-3.848e-06, -4.384e-06, 4.099e-07, -6.047e-06, -3.164e-03, 2.138e-02, 1.293e-03, -4.130e-03, -1.016e-03, 1.246e-02, -2.369e-04, -4.543e-03, -3.947e-05, -2.621e-03, 2.396e-05, -8.012e-05) * s0_0_2;
	r0 += M4(1.246e-02, -4.553e-02, 2.425e-01, -1.907e-01, 3.525e-02, -5.991e-03, 2.490e-02, -1.336e-02, 1.292e-01, 8.481e-03, -6.671e-02, 2.906e-02, -4.638e-04, 1.900e-03, 1.423e-02, -6.533e-03) * s0_1_0;
	r0 += M4(-3.462e-03, -1.148e-02, 1.663e-03, 1.890e-03, 9.805e-02, 1.723e-01, 1.937e-01, 2.608e-01, 1.248e-01, 1.714e-01, 5.971e-03, -5.566e-01, -7.629e-02, -1.304e-01, -1.516e-02, -3.602e-03) * s0_1_1;
	r0 += M4(4.661e-06, -2.822e-05, -6.296e-07, 3.450e-06, -8.437e-03, 9.301e-03, -1.314e-02, 2.877e-02, 9.105e-03, 6.973e-02, -1.560e-04, 3.828e-02, -9.424e-03, 4.507e-02, 5.978e-03, -9.546e-03) * s0_1_2;
	r0 += M4(-6.098e-04, -4.599e-05, 1.205e-02, -1.023e-02, -2.526e-03, 3.622e-03, 8.231e-03, 2.374e-03, -1.785e-03, -2.773e-03, 2.912e-02, -7.355e-04, 5.620e-03, 9.968e-04, 3.455e-02, 3.115e-03) * s0_2_0;
	r0 += M4(-3.546e-05, -4.312e-04, -6.319e-05, -2.592e-03, -1.796e-02, -2.253e-02, -3.143e-02, -2.316e-02, 6.567e-03, 2.983e-04, 5.040e-02, 1.299e-02, -2.144e-02, 1.245e-02, -4.187e-01, 2.167e-02) * s0_2_1;
	r0 += M4(-5.920e-07, 2.248e-07, -1.374e-07, -5.938e-07, 9.086e-04, -3.072e-03, -1.539e-03, 4.156e-03, 1.299e-03, -7.871e-04, 4.377e-04, 3.998e-02, 1.323e-02, -7.030e-02, -1.234e-02, -7.450e-02) * s0_2_2;
	r0 += M4(2.481e-02, 1.660e-03, -2.832e-04, -2.047e-03, 2.554e-02, 5.075e-03, -5.785e-03, -6.690e-04, -3.446e-02, -6.909e-04, -1.838e-03, 1.554e-03, 6.578e-03, 7.055e-04, -3.060e-04, 3.240e-05) * s1_0_0;
	r0 += M4(-4.244e-02, 1.219e-01, -4.918e-03, 1.674e-03, 8.569e-02, 1.189e-01, -1.377e-03, 1.437e-03, -6.777e-02, -1.724e-01, -2.668e-02, 7.020e-03, 5.608e-02, 1.250e-02, -3.215e-03, 3.796e-03) * s1_0_1;
	r0 += M4(-1.082e-03, 5.718e-03, 2.147e-03, -1.112e-02, -9.906e-04, 2.704e-02, 2.456e-04, -7.119e-04, 9.025e-04, 3.168e-02, -1.690e-04, 7.354e-03, 6.277e-03, 2.999e-02, -3.101e-03, 9.560e-04) * s1_0_2;
	r0 += M4(6.648e-02, 4.312e-03, 5.718e-02, 2.617e-03, -1.753e-01, -1.251e-03, -3.009e-02, -5.551e-03, -9.351e-02, -4.959e-03, -7.695e-02, 7.611e-04, -9.992e-03, -3.980e-03, 9.832e-04, 4.663e-03) * s1_1_0;
	r0 += M4(-4.010e-01, -2.975e-02, -3.799e-01, 3.364e-01, 1.406e-02, -3.897e-01, 3.442e-01, 2.920e-01, 2.132e-01, 2.634e-02, 1.631e-01, -4.756e-01, -6.426e-01, -1.333e-02, 2.172e-01, 4.797e-02) * s1_1_1;
	r0 += M4(4.236e-04, 1.509e-01, 3.787e-03, 1.165e-01, -2.784e-03, 9.932e-02, -1.619e-03, 9.244e-02, 1.189e-03, 1.031e-01, 2.305e-03, 1.337e-01, 9.335e-03, -2.074e-01, -2.405e-02, 8.515e-02) * s1_1_2;
	r0 += M4(-3.805e-04, -1.806e-03, 7.601e-03, 3.036e-03, 2.002e-03, 2.594e-03, -5.504e-02, 1.433e-03, 5.855e-04, 1.177e-03, -1.248e-02, -4.754e-03, 1.528e-03, -1.541e-03, 6.347e-03, -2.104e-03) * s1_2_0;
	r0 += M4(1.309e-03, 6.058e-05, -1.001e-01, -7.154e-02, -3.519e-03, 4.345e-03, -8.674e-02, -1.305e-01, 1.125e-03, -1.404e-03, 6.427e-02, 9.282e-02, 9.884e-03, 3.839e-03, 4.545e-04, 7.587e-02) * s1_2_1;
	r0 += M4(1.088e-03, -2.846e-03, -5.854e-03, 1.935e-02, 1.838e-03, 1.435e-02, -4.744e-03, 7.629e-03, 7.239e-05, 8.069e-04, -9.010e-05, 4.534e-02, -1.518e-04, -9.850e-03, 6.338e-03, -4.554e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(9.171e-02, 1.642e-02, -9.102e-03, 5.379e-04, 1.775e-02, 6.663e-04, 1.678e-02, 1.738e-03, 2.972e-02, -9.126e-03, 4.898e-03, -7.337e-04, 6.405e-04, -2.220e-03, 2.865e-03, -8.054e-04) * s0_0_0;
	r0 += M4(2.374e-01, 2.488e-01, 6.940e-03, 4.773e-03, -1.492e-01, -5.192e-02, -3.106e-03, -5.454e-03, 5.211e-02, 5.310e-02, 1.811e-02, -1.628e-02, -3.235e-02, 6.131e-02, 1.821e-02, -6.660e-03) * s0_0_1;
	r0 += M4(-7.334e-03, 4.469e-02, 1.575e-03, -4.722e-03, -4.113e-04, -4.259e-02, -5.982e-04, 2.925e-03, -4.922e-04, -1.287e-02, 1.674e-05, -3.481e-04, -8.257e-03, -5.065e-02, 1.732e-03, -3.609e-03) * s0_0_2;
	r0 += M4(-1.411e-02, -5.164e-03, -9.227e-02, 1.208e-02, -1.164e-02, -5.501e-03, 5.257e-03, -4.676e-03, 3.047e-01, 1.672e-02, 2.768e-01, -3.095e-03, 1.004e-02, 1.037e-03, 3.971e-02, -8.777e-03) * s0_1_0;
	r0 += M4(7.288e-03, -1.042e-02, -2.433e-01, -3.641e-01, 7.553e-02, 5.586e-01, -2.627e-01, 2.050e-01, -1.441e-01, -2.268e-01, -9.550e-02, 1.557e-01, 3.564e-01, -2.725e-01, -3.482e-01, 1.332e-01) * s0_1_1;
	r0 += M4(1.286e-03, 1.689e-02, 3.874e-04, 1.676e-02, 3.601e-03, -3.588e-02, 1.211e-02, -1.018e-01, 2.605e-04, -7.088e-02, -8.821e-04, -8.037e-02, -1.077e-02, 1.284e-01, 2.640e-02, -1.526e-01) * s0_1_2;
	r0 += M4(9.674e-04, -3.692e-04, -2.082e-04, -1.211e-03, 2.792e-03, 1.520e-03, 2.494e-03, -1.048e-03, -2.547e-03, -4.282e-03, 3.184e-02, 6.918e-03, -5.338e-03, 7.582e-04, 1.888e-02, -4.045e-03) * s0_2_0;
	r0 += M4(1.709e-04, 1.016e-03, 3.579e-04, 3.842e-03, -4.962e-03, -8.560e-03, -1.517e-02, 7.340e-02, -1.869e-04, -1.293e-02, -4.600e-02, -1.850e-01, -3.618e-03, 1.018e-02, 2.329e-01, 1.202e-01) * s0_2_1;
	r0 += M4(-1.519e-04, -2.562e-04, -2.240e-05, -9.021e-04, 4.172e-04, -1.682e-03, 2.635e-04, -3.317e-02, -6.340e-04, 6.472e-04, 1.322e-04, -3.302e-02, -1.102e-03, 8.252e-04, 1.285e-03, 3.800e-02) * s0_2_2;
	r0 += V4(-1.415e-09, 4.971e-09, -5.230e-12, 5.916e-11);
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
